***Graduate Student Survey Data Cleaning 03/08/22***

*Load data from .xls (.csv had some meta data issues) (GSSSExploratory for variable ease):
import excel "/Users/calla/Dropbox/Work/Research/Grad Student Survey/GSSSExploratoryData030722.xls", sheet("GSSSExp
> loratoryData030722") firstrow

*Data cleaning*
gen program = University

*standardize University names
replace program="GW" if University == "George Washington University"
replace program="UPenn" if University == "University of Pennsylvania"
replace program="Maryland" if University == "University of Maryland College Park"
replace program="Chicago" if University == "University of Chicago"
replace program="UTAustin" if University == "University of Texas at Austin"
replace program="Iowa" if University == "University of Iowa"
replace program="UMich" if University == "University of Michigan Ann Arbor"
replace program="Georgia" if University == "University of Georgia"
replace program="Minnesota" if University == "University of Minnesota Twin Cities"
replace program="Notre Dame" if University == "University of Notre Dame"
replace program="Wisconsin" if University == "University of Wisconsin Madison"
replace program="Pitt" if University == "University of Pittsburgh"
replace program="UVA" if University == "University of Virginia"
replace program="UC Boulder" if University == "University of Colorado Boulder"
replace program="WUSTL" if University == "Washington University in St. Louis"

*gen gender man woman trans*
gen gender = "woman"
replace gender = "man" if Whatisyourgenderidentityc == "Man"
replace gender = "trans" if Whatisyourgenderidentityc == "Other"
replace gender = "trans" if Whatisyourgenderidentityc == "Man,Other"
replace gender = "trans" if Whatisyourgenderidentityc == "Trans and/or non-binary"
replace gender = "trans" if Whatisyourgenderidentityc == "Man,Trans and/or non-binary"
replace gender = "trans" if Whatisyourgenderidentityc == "Man,Woman"
replace gender = "trans" if Whatisyourgenderidentityc == "Woman,Other"
replace gender = "trans" if Whatisyourgenderidentityc == "Woman,Trans and/or non-binary"

*gen POC/white
gen POC = 1
replace POC = 0 if RacecheckallthatapplySe == "White"
replace POC = 1 if Ethnicity == "Hispanic or Latinx"

*gen private public
gen public = 1
replace public = 0 if program == "George Washington University"
replace public = 0 if program == "UPenn"
replace public = 0 if program == "Chicago"
replace public = 0 if program == "Notre Dame"
replace public = 0 if program == "Brown University"
replace public = 0 if program == "Yale University"
replace public = 0 if program == "Harvard University"
replace public = 0 if program == "Cornell University"
replace public = 0 if program == "Duke University"
replace public = 0 if program == "Syracuse University"
replace public = 0 if program == "Stanford University"
replace public = 0 if program == "Princeton University"
replace public = 0 if program == "WUSTL"
replace public = 0 if program == "MIT"
replace public = 0 if program == "Northwestern University"
replace public = 0 if program == "Georgetown University"
replace public = 0 if program == "New York University"
replace public = 0 if program == "Rice University"
replace public = 0 if program == "Vanderbilt University"
replace public = 0 if program == "Columbia University"
replace public = 0 if program == "Emory University"
replace public = 0 if program == "John Hopkins University"


*most others on race tell us they are Latino or Asian; fill out
replace RacecheckallthatapplySe = "Black or African American" if RacecheckallthatapplyOt == "AfroCaribbean"
replace RacecheckallthatapplySe = "Black or African American" if RacecheckallthatapplyOt == "Biracial, Black and White"
replace RacecheckallthatapplySe = "Asian" if RacecheckallthatapplyOt == "Southeast Asian"
replace RacecheckallthatapplySe = "Asian" if RacecheckallthatapplyOt == "South Asian"

*gen race white black asian latino middle eastern other
gen race = "Other"
*should we leave people who identified as mixed in other?
replace race = "White" if RacecheckallthatapplySe == "White"
replace race = "Latinx" if Ethnicity == "Hispanic or Latinx"
replace race = "Black" if RacecheckallthatapplySe == "Black or African American"
replace race = "Asian" if RacecheckallthatapplySe == "Asian"
*the Asian,other entries all list a nationality
replace race = "Asian" if RacecheckallthatapplySe == "Asian,Other"
replace race = "Middle Eastern" if RacecheckallthatapplySe == "Middle Eastern (Arab, Amazigh, Persian, etc)"
*The MidEast/other typically list a nationality
replace race = "Middle Eastern" if RacecheckallthatapplySe == "Middle Eastern (Arab, Amazigh, Persian, etc),Other"
replace race = "Middle Eastern" if RacecheckallthatapplySe == "Asian,Middle Eastern (Arab, Amazigh, Persian, etc)"

*Experimental conditions
gen Exp1 = 0
gen Exp2 = 0
gen Exp3 = 0
replace Exp1 = 1 if ThefollowingisreportingbyTh == "Yes"
replace Exp1 = 1 if ThefollowingisreportingbyTh == "No"
replace Exp2 = 1 if AT == "Yes"
replace Exp2 = 1 if AT == "No"
replace Exp3 = 1 if AU == "Yes"
replace Exp3 = 1 if AU == "No"

*Reporting
*first 28 responses on 1-100 scale; converted to 10 scale.
gen ReportMisconduct = ReportingHowlikelyareyout
replace ReportMisconduct = 6 if ReportingHowlikelyareyout == 61
replace ReportMisconduct = 5 if ReportingHowlikelyareyout == 51
replace ReportMisconduct = 10 if ReportingHowlikelyareyout == 100
replace ReportMisconduct = 7 if ReportingHowlikelyareyout == 66
replace ReportMisconduct = 8 if ReportingHowlikelyareyout == 80
replace ReportMisconduct = 8 if ReportingHowlikelyareyout == 83
replace ReportMisconduct = 7 if ReportingHowlikelyareyout == 70
replace ReportMisconduct = 10 if ReportingHowlikelyareyout == 98
replace ReportMisconduct = 5 if ReportingHowlikelyareyout == 50
replace ReportMisconduct = 7 if ReportingHowlikelyareyout == 67
replace ReportMisconduct = 9 if ReportingHowlikelyareyout == 90
replace ReportMisconduct = 6 if ReportingHowlikelyareyout == 60

*Turn strings into factor variables
encode Statusingraduateprogram, gen(status)
encode gender, gen(gender1)
encode race, gen(race1)
encode DoyouidentifyasLGTBQ, gen(lgbt)
encode Doyouhavechildren, gen(kids)
encode Areyouafirstgenerationstude, gen(firstgen)
encode Areyouclassifiedasaninterna, gen(international)
encode Fieldofstudycheckallthata, gen(subfield)
encode Doesyouruniversityhaveagrad, gen(union)
encode Doesyourdepartmentâsfinanci, gen(money)
encode Doesyourcontractguaranteesum, gen(summer)
encode Doesyourdepartmentgiveyoumo, gen(research)
encode Haveyouexperiencedexploitativ, gen(exploitation)
encode Haveyouexperiencedsexualhara, gen(sexharass)
encode Haveyouexperiencedhomophobia, gen(homophobia)
encode Haveyouexperiencedracialhara, gen(racism)
encode Doyouintendtopursueatenure, gen(intent)
encode Whatisyourprimarysourceoff, gen(source)
encode Doyouexpecttoattainatenure, gen(expectation)

gen rank = 100
gen top50 = 0

replace rank = 1 if program=="Stanford University"
replace rank = 2 if program=="Harvard University"
replace rank = 2 if program=="Princeton University"
replace rank = 4 if program=="UC Berkeley"
replace rank = 4 if program=="UMich"
replace rank = 6 if program=="Yale University"
replace rank = 7 if program=="MIT"
replace rank = 8 if program=="Columbia University"
replace rank = 8 if program=="UC San Diego"
replace rank = 10 if program=="Duke University"

replace top50 = 1 if program=="Stanford University"
replace top50 = 1 if program=="Harvard University"
replace top50 = 1 if program=="Princeton University"
replace top50 = 1 if program=="UC Berkeley"
replace top50 = 1 if program=="UMich"
replace top50 = 1 if program=="Yale University"
replace top50 = 1 if program=="MIT"
replace top50 = 1 if program=="Columbia University"
replace top50 = 1 if program=="UC San Diego"
replace top50 = 1 if program=="Duke University"

replace rank = 10 if program=="Chicago"
replace rank = 12 if program=="UCLA"
replace rank = 12 if program=="UNC Chapel Hill"
replace rank = 12 if program=="WUSTL"
replace rank = 15 if program=="Cornell University"
replace rank = 15 if program=="New York University"
replace rank = 17 if program=="Ohio State University"
replace rank = 17 if program=="Wisconsin"
replace rank = 19 if program=="Emory University"
replace rank = 19 if program=="Northwestern University"

replace top50 = 1 if program=="Chicago"
replace top50 = 1 if program=="UCLA"
replace top50 = 1 if program=="UNC Chapel Hill"
replace top50 = 1 if program=="WUSTL"
replace top50 = 1 if program=="Cornell University"
replace top50 = 1 if program=="New York University"
replace top50 = 1 if program=="Ohio State University"
replace top50 = 1 if program=="Wisconsin"
replace top50 = 1 if program=="Emory University"
replace top50 = 1 if program=="Northwestern University"

replace rank = 19 if program=="UPenn"
*replace rank = 19 if program=="Rochester" NO ROCHESTER
replace rank = 19 if program=="UTAustin"
replace rank = 19 if program=="Vanderbilt University"
replace rank = 25 if program=="UC Davis"
replace rank = 25 if program=="Minnesota"
replace rank = 27 if program=="University of Illinois Urbana-Champaign"
replace rank = 28 if program=="Indiana University Bloomington"
replace rank = 28 if program=="Rice University"
replace rank = 28 if program=="Stony Brook University"

replace top50 = 1 if program=="UPenn"
replace top50 = 1 if program=="UTAustin"
replace top50 = 1 if program=="Vanderbilt University"
replace top50 = 1 if program=="UC Davis"
replace top50 = 1 if program=="Minnesota"
replace top50 = 1 if program=="University of Illinois Urbana-Champaign"
replace top50 = 1 if program=="Indiana University Bloomington"
replace top50 = 1 if program=="Rice University"
replace top50 = 1 if program=="Stony Brook University"

replace rank = 28 if program=="Texas A&M"
replace rank = 28 if program=="Maryland"
replace rank = 28 if program=="UVA"
replace rank = 34 if program=="Georgetown University"
replace rank = 34 if program=="Pennsylvania State University"
replace rank = 34 if program=="University of Washington"
replace rank = 37 if program=="GW"
replace rank = 37 if program=="Michigan State University"
replace rank = 37 if program=="Notre Dame"
replace rank = 37 if program=="Pitt"

replace top50 = 1 if program=="Texas A&M"
replace top50 = 1 if program=="Maryland"
replace top50 = 1 if program=="UVA"
replace top50 = 1 if program=="Georgetown University"
replace top50 = 1 if program=="Pennsylvania State University"
replace top50 = 1 if program=="University of Washington"
replace top50 = 1 if program=="GW"
replace top50 = 1 if program=="Michigan State University"
replace top50 = 1 if program=="Notre Dame"
replace top50 = 1 if program=="Pitt"

replace rank = 41 if program=="Brown University"
replace rank = 41 if program=="Florida State University"
replace rank = 41 if program=="John Hopkins University"
replace rank = 41 if program=="UC Irvine"
replace rank = 41 if program=="Georgia"
replace rank = 46 if program=="UC Boulder"
replace rank = 46 if program=="Iowa"
replace rank = 48 if program=="Rutgers University"
replace rank = 48 if program=="UC Riverside"
replace rank = 50 if program=="Syracuse University"
replace rank = 50 if program=="Arizona"

replace top50 = 1 if program=="Brown University"
replace top50 = 1 if program=="Florida State University"
replace top50 = 1 if program=="John Hopkins University"
replace top50 = 1 if program=="UC Irvine"
replace top50 = 1 if program=="Georgia"
replace top50 = 1 if program=="UC Boulder"
replace top50 = 1 if program=="Iowa"
replace top50 = 1 if program=="Rutgers University"
replace top50 = 1 if program=="UC Riverside"
replace top50 = 1 if program=="Syracuse University"
replace top50 = 1 if program=="Arizona"



***saved after these changes***
save "/Users/calla/Dropbox/Work/Papers/Working/Grad Student Survey/Data/GSSSData062122.dta"

